Functions
source_from_github(repositoy = "DEG_functions",version = "0.2.56")
## Loading required package: devtools
## Loading required package: usethis
## ℹ SHA-1 hash of file is d83bddf0b3c26bcb7107ed270f2e7da90f77c113
##
## Registered S3 method overwritten by 'ggforce':
## method from
## scale_type.units units
## Registered S3 method overwritten by 'ggtree':
## method from
## identify.gg ggfun
## clusterProfiler v4.2.2 For help: https://yulab-smu.top/biomedical-knowledge-mining-book/
##
## If you use clusterProfiler in published research, please cite:
## T Wu, E Hu, S Xu, M Chen, P Guo, Z Dai, T Feng, L Zhou, W Tang, L Zhan, X Fu, S Liu, X Bo, and G Yu. clusterProfiler 4.0: A universal enrichment tool for interpreting omics data. The Innovation. 2021, 2(3):100141
source_from_github(repositoy = "HMSC_functions",version = "0.1.14",script_name = "functions.R")
## ℹ SHA-1 hash of file is 9ec21e2e1fdc9a7b465400b3111b50804a136cf3
source_from_github(repositoy = "cNMF_functions",version = "0.4.04",script_name = "cnmf_functions_V3.R")
## ℹ SHA-1 hash of file is a735d9bfb5bc55fcb203f83a7240b9713fc080d3
## Loading required package: reticulate
source_from_github(repositoy = "sc_general_functions",version = "0.1.34",script_name = "functions.R")
## ℹ SHA-1 hash of file is 55839fb7db3fdb3e61d8d423b0b3129bfa1d777b
## Loading required package: RColorBrewer
library(hypeR)
source(input$fc_params)
library(hypeR)
library(GSEABase)
## Loading required package: BiocGenerics
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:dplyr':
##
## combine, intersect, setdiff, union
## The following objects are masked from 'package:igraph':
##
## normalize, path, union
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, aperm, append, as.data.frame, basename, cbind, colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
## get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
## Position, rank, rbind, Reduce, rownames, sapply, saveRDS, setdiff, table, tapply, union, unique, unsplit, which.max, which.min
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with 'browseVignettes()'. To cite Bioconductor, see 'citation("Biobase")', and for
## packages 'citation("pkgname")'.
##
## Attaching package: 'Biobase'
## The following object is masked from 'package:rlang':
##
## exprs
## Loading required package: annotate
## Loading required package: AnnotationDbi
## Loading required package: stats4
## Loading required package: IRanges
## Loading required package: S4Vectors
##
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:clusterProfiler':
##
## rename
## The following object is masked from 'package:plyr':
##
## rename
## The following objects are masked from 'package:dplyr':
##
## first, rename
## The following object is masked from 'package:tidyr':
##
## expand
## The following objects are masked from 'package:Matrix':
##
## expand, unname
## The following object is masked from 'package:utils':
##
## findMatches
## The following objects are masked from 'package:base':
##
## expand.grid, I, unname
##
## Attaching package: 'IRanges'
## The following object is masked from 'package:clusterProfiler':
##
## slice
## The following object is masked from 'package:plyr':
##
## desc
## The following objects are masked from 'package:dplyr':
##
## collapse, desc, slice
## The following object is masked from 'package:purrr':
##
## reduce
##
## Attaching package: 'AnnotationDbi'
## The following object is masked from 'package:clusterProfiler':
##
## select
## The following object is masked from 'package:dplyr':
##
## select
## Loading required package: XML
## Loading required package: graph
##
## Attaching package: 'graph'
## The following object is masked from 'package:XML':
##
## addNode
## The following object is masked from 'package:plyr':
##
## join
## The following object is masked from 'package:stringr':
##
## boundary
## The following objects are masked from 'package:igraph':
##
## degree, edges, intersection
library(grid)
library(ComplexHeatmap)
## ========================================
## ComplexHeatmap version 2.10.0
## Bioconductor page: http://bioconductor.org/packages/ComplexHeatmap/
## Github page: https://github.com/jokergoo/ComplexHeatmap
## Documentation: http://jokergoo.github.io/ComplexHeatmap-reference
##
## If you use it in published research, please cite:
## Gu, Z. Complex heatmaps reveal patterns and correlations in multidimensional
## genomic data. Bioinformatics 2016.
##
## The new InteractiveComplexHeatmap package can directly export static
## complex heatmaps into an interactive Shiny app with zero effort. Have a try!
##
## This message can be suppressed by:
## suppressPackageStartupMessages(library(ComplexHeatmap))
## ========================================
library(ggpubr)
library(stringr)
library(data.table)
HMSC vs ACC
UMAP
DimPlot(hmsc_acc_pri_cancer,group.by = "patient.ident",label = T)

# calculate lum/myo spectrum
myo_genes = c("TP63", "TP73", "CAV1", "CDH3", "KRT5", "KRT14", "ACTA2", "TAGLN", "MYLK", "DKK3")
lum_genes = c("KIT", "EHF", "ELF5", "KRT7", "CLDN3", "CLDN4", "CD24", "LGALS3", "LCN2", "SLPI")
myoscore=FetchData(object =hmsc_acc_pri_cancer,vars = myo_genes,slot = "data") %>% rowMeans()
lescore=FetchData(object =hmsc_acc_pri_cancer,vars = lum_genes,slot = "data") %>% rowMeans()
hmsc_acc_pri_cancer=AddMetaData(hmsc_acc_pri_cancer,lescore-myoscore,"luminal_over_myo")
# plot
FeaturePlot(object = hmsc_acc_pri_cancer,features = "luminal_over_myo") & scale_color_gradientn(colours = c("blue"," grey","red"))
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

FeaturePlot(object = hmsc_acc_pri_cancer,features = c("MYB"),pt.size = 1)

p = FeaturePlot(object = hmsc_acc_pri_cancer,features = c("kaye_acc_score"),pt.size = 1,combine = T)
p

pdf(paste0(params$data_out_dir,"/kaye_acc_score_AllCancerCells.pdf"))
p
dev.off()
## png
## 2
enrichment
analysis
hmsc_acc_pri_cancer = SetIdent(hmsc_acc_pri_cancer, value ="patient.ident")
deg <-
FindMarkers(
hmsc_acc_pri_cancer,
ident.1 = "HMSC",
features = VariableFeatures(hmsc_acc_pri_cancer),
densify = T,
assay = "RNA",
mean.fxn = function(x) {
return(log(rowMeans(x) + 1, base = 2)) # change fun to calculate logFC in log space data (default to exponent data)
}
)
library(biomaRt)
IDs <- rownames(deg)
genedesc <- getBM(attributes=c('external_gene_name','description'), filters = 'external_gene_name', values = IDs, mart =ensembl)
acc_deg_desc = left_join(x = rownames_to_column(deg),y = genedesc,by = c("rowname"= "external_gene_name"))
names(acc_deg_desc)[1] = "gene"
hmsc_up_genes = acc_deg_desc [acc_deg_desc$avg_log2FC>0,]
acc_up_genes = acc_deg_desc [acc_deg_desc$avg_log2FC<0,]
fwrite(hmsc_up_genes,file = params$data_out_dir %s+% "hmsc_up_genes.csv",row.names = F)
fwrite(acc_up_genes,file = params$data_out_dir %s+% "cc_up_genes.csv",row.names = F)
ranked_vec = deg[,"avg_log2FC"]%>% setNames(rownames(deg)) %>% na.omit() # make named vector
hyp_obj <-hypeR_fgsea(signature = ranked_vec,genesets = geneIds(genesets_h),up_only = F)
plt = hyp_dots(hyp_obj,merge = T,fdr = 0.2)
plt+ scale_y_discrete(limits = plyr::arrange(plt$data,signature,significance)[,"label"])+
scale_colour_gradient2(low = "red",mid = "black",high = "black",midpoint = 0.1)
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

plt = hyp_dots(hyp_obj,merge = F,fdr = 0.2)
plt1 = plt$up+ aes(color=nes)+ggtitle("Up in HMSC")+ scale_colour_gradient2(low = "#E53935",mid = "white",high = "#E53935",midpoint = 0,limits = c(-3,3))
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
plt2 = plt$dn+ aes(color=nes)+ggtitle("Down in HMSC")+ scale_colour_gradient2(low = "#E53935",mid = "white",high = "#E53935",midpoint = 0,limits = c(-3,3))
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
p = ggarrange(plt1,plt2,common.legend = T,nrow = 1,legend = "right")
p

# plot GSEA
args = list(
name = c("up","dn"),
color = c("dodgerblue","indianred2"),
title = c("Up in HMSC","Up in ACC")
)
p_list = list()
for (i in 1:2) {
name = args$name[i]
color = args$color[i]
title = args$title[i]
final_result = hyp_obj$data[[name]]$data
final_result = final_result %>% filter(fdr<0.2)
final_result$fdr = -log10(final_result$fdr)
p <- ggplot(data = final_result, aes(x = reorder(label,fdr),y = fdr)) +
geom_bar(stat = "identity", fill = color) +
coord_flip() +
geom_hline(yintercept = -log10(0.05), colour = "black", linetype = "longdash", size = 0.8,alpha = 0.5)+
xlab("Pathway") +
scale_fill_manual(drop = FALSE) + ylab("FDR") +
geom_text(aes_string(label = "label", y = 0),
size = 4, color = "black", position = position_dodge(1),
hjust = 0) +
scale_y_continuous(labels = function(x) {parse(text = paste0("10^-",x))}, # set fdr format 10^-X
expand = expansion(mult = c(0, 0.1)))+ #expand to avoid cut off
theme(axis.text.y = element_blank(), axis.ticks.y = element_blank(),axis.text.x = element_text(size = 10)) +
ggtitle(title)
p_list[[name]] = p
}
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
p = p_list[[1]]+p_list[[2]]
p

pdf(paste0(params$data_out_dir,"ACC_vs_HMSC_GSEA_no_filtering_barplot.pdf"),width = 10)
p
dev.off()
## png
## 2
Cell
cycle score
hallmark_name = "GO_MITOTIC_CELL_CYCLE"
acc_pri = FindVariableFeatures(object = acc_pri,nfeatures = 15000)
acc_pri = ScaleData(object = acc_pri,features = VariableFeatures(acc_pri))
## Centering and scaling data matrix
geneIds= genesets_h[[hallmark_name]]@geneIds %>% intersect(VariableFeatures(acc_pri))
score <- apply(acc_pri@assays$RNA@scale.data[geneIds,],2,mean)
acc_pri=AddMetaData(acc_pri,score,hallmark_name)
hmsc_cancer_cells = FindVariableFeatures(object = hmsc_cancer_cells,nfeatures = 15000)
## Warning in FindVariableFeatures.Assay(object = assay.data, selection.method = selection.method, : selection.method set to 'vst' but count slot is
## empty; will use data slot instead
## Warning in eval(predvars, data, env): NaNs produced
## Warning in hvf.info$variance.expected[not.const] <- 10^fit$fitted: number of items to replace is not a multiple of replacement length
hmsc_cancer_cells = ScaleData(object = hmsc_cancer_cells,features = VariableFeatures(hmsc_cancer_cells))
## Centering and scaling data matrix
geneIds= genesets_h[[hallmark_name]]@geneIds %>% intersect(VariableFeatures(hmsc_cancer_cells))
score <- apply(hmsc_cancer_cells@assays$integrated@scale.data[geneIds,],2,mean)
hmsc_cancer_cells=AddMetaData(hmsc_cancer_cells,score,hallmark_name)
acc_cc_scores = FetchData(object = acc_pri,vars = "GO_MITOTIC_CELL_CYCLE")
hmsc_cc_scores = FetchData(object = hmsc_cancer_cells,vars = "GO_MITOTIC_CELL_CYCLE")
distributions_plt = ggplot() +
geom_density(aes(GO_MITOTIC_CELL_CYCLE, fill = "ACC"), alpha = .2, data = acc_cc_scores) +
geom_density(aes(GO_MITOTIC_CELL_CYCLE, fill = "HMSC"), alpha = .2, data = hmsc_cc_scores) +
scale_fill_manual(name = "Dataset", values = c(ACC = "red", HMSC = "green"))+ geom_vline(aes(xintercept=0.3),
color="blue", linetype="dashed", size=1) +ggtitle("'GO_MITOTIC_CELL_CYCLE' score distribution")
print_tab(plt = distributions_plt,title = "score distribution",subtitle_num = 3)
score distribution

hmsc_cc_cells = (sum(hmsc_cancer_cells@meta.data[[hallmark_name]]> 0.3) /ncol(hmsc_cancer_cells)) %>% round(digits = 3)*100
acc_cc_cells = (sum(acc_pri@meta.data[[hallmark_name]]> 0.3)/ncol(acc_pri)) %>% round(digits = 3)*100
df = data.frame(Dataset = c("HMSC","ACC"), cycling_cells_percentage = c(hmsc_cc_cells,acc_cc_cells))
cycling_cells_plt = ggplot(data=df, aes(x=Dataset, y=cycling_cells_percentage)) +
geom_text(aes(label=cycling_cells_percentage), vjust=0, color="black", size=3.5)+
geom_bar(stat="identity")+ylab(" % cycling cells")+
geom_bar(stat="identity", fill="steelblue")+
theme_minimal() + ggtitle("Cycling cells count")
print_tab(plt = cycling_cells_plt,title = "# cycling cells",subtitle_num = 3)
# cycling cells

pdf(paste0(params$data_out_dir,"/CC_distributions.pdf"))
distributions_plt
dev.off()
## png
## 2
pdf(paste0(params$data_out_dir,"/cycling_cells.pdf"))
cycling_cells_plt
dev.off()
## png
## 2
print_tab(plt =
FeaturePlot(hmsc_acc_pri_cancer,features = c("MKI67","CDK1","MCM2","CDC20"))
,title = "CC genes",subtitle_num = 3)
CC genes

Cycling cells filtering
#add score to all acc cancer cells
# geneIds= genesets[[hallmark_name]]@geneIds %>% intersect(VariableFeatures(all_acc_cancer_cells,assay = "integrated"))
# score <- apply(all_acc_cancer_cells@assays$integrated@scale.data[geneIds,],2,mean)
#add score to all acc cancer cells
cc_all = c(acc_pri$GO_MITOTIC_CELL_CYCLE, hmsc_cancer_cells$GO_MITOTIC_CELL_CYCLE) %>% as.data.frame()
hmsc_acc_pri_cancer=AddMetaData(hmsc_acc_pri_cancer,cc_all,hallmark_name)
#filter:
all_acc_cancer_cells_ccFiltered=hmsc_acc_pri_cancer[,hmsc_acc_pri_cancer@meta.data[[hallmark_name]]< 0.3]
min_threshold = min(hmsc_acc_pri_cancer$GO_MITOTIC_CELL_CYCLE)
max_threshold = max(hmsc_acc_pri_cancer$GO_MITOTIC_CELL_CYCLE)
library(viridis)
## Loading required package: viridisLite
p_before = FeaturePlot(object = hmsc_acc_pri_cancer,features = hallmark_name) +
ggtitle("Before cc filtering") &
scale_color_gradientn(colours = plasma(n = 10, direction = -1),
limits = c(min_threshold, max_threshold))
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
print_tab(plt = p_before ,title = "Before CC filtering",subtitle_num = 3)
Before CC filtering

p_after = FeaturePlot(object = all_acc_cancer_cells_ccFiltered,features = hallmark_name) +
ggtitle("After cc filtering") &
scale_color_gradientn(colours = plasma(n = 10, direction = -1), limits = c(min_threshold, max_threshold))
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
print_tab(plt = p_after, title = "After CC filtering" ,subtitle_num = 3)
After CC filtering

pdf(paste0(params$data_out_dir,"/ACC_vs_HMSC_cc_score_before.pdf"),width = 8,height = 5)
p_before
dev.off()
## png
## 2
pdf(paste0(params$data_out_dir,"/ACC_vs_HMSC_cc_score_after.pdf"),width = 8,height = 5)
p_after
dev.off()
## png
## 2
DEG
all_acc_cancer_cells_ccFiltered = SetIdent(all_acc_cancer_cells_ccFiltered, value ="patient.ident")
all_acc_cancer_cells_ccFiltered = FindVariableFeatures(all_acc_cancer_cells_ccFiltered,nfeatures = 15000,assay = "integrated")
## Warning in eval(predvars, data, env): NaNs produced
## Warning in hvf.info$variance.expected[not.const] <- 10^fit$fitted: number of items to replace is not a multiple of replacement length
deg <-
FindMarkers(
all_acc_cancer_cells_ccFiltered,
ident.1 = "HMSC",
features = VariableFeatures(all_acc_cancer_cells_ccFiltered,assay = "integrated"),
densify = T,
verbose = T,
slot = "data",
logfc.threshold = 0,
mean.fxn = function(x) {
return(log(rowMeans(x) + 1,base = 2)) # change func to calculate logFC in log space data (default to exponent data)
},
assay = "RNA"
)
ranked_vec = deg[,"avg_log2FC" ]%>% setNames(rownames(deg)) %>% na.omit() # make named vector
hyp_obj <-hypeR_fgsea(signature = ranked_vec,genesets = geneIds(genesets_h),up_only = F)
plt = hyp_dots(hyp_obj,merge = F,fdr = 0.2)
plt1 = plt$up+ aes(size=nes)+ggtitle("Up in HMSC")+
guides(
size = guide_legend(title="|NES|",reverse=T))
plt2 = plt$dn+ aes(size=nes)+ggtitle("Down in HMSC")+scale_size(trans = 'reverse')+
guides(
size = guide_legend(title="|NES|",reverse=F))
## Scale for size is already present.
## Adding another scale for size, which will replace the existing scale.
p = ggarrange(plt1,plt2,common.legend = T,nrow = 1,legend = "right")
p

pdf(paste0(params$data_out_dir,"/ACC_vs_HMSC_GSEA.pdf"),width = 12,height = 4)
p
dev.off()
## png
## 2
# plot GSEA
args = list(
name = c("up","dn"),
color = c("dodgerblue","indianred2"),
title = c("Up in HMSC","Up in ACC")
)
p_list = list()
for (i in 1:2) {
name = args$name[i]
color = args$color[i]
title = args$title[i]
final_result = hyp_obj$data[[name]]$data
final_result = final_result %>% filter(fdr<0.2)
final_result$fdr = -log10(final_result$fdr)
p <- ggplot(data = final_result, aes(x = reorder(label,fdr),y = fdr)) +
geom_bar(stat = "identity", fill = color) +
coord_flip() +
geom_hline(yintercept = -log10(0.05), colour = "black", linetype = "longdash", size = 0.8,alpha = 0.5)+
xlab("Pathway") +
scale_fill_manual(drop = FALSE) + ylab("FDR") +
geom_text(aes_string(label = "label", y = 0),
size = 4, color = "black", position = position_dodge(1),
hjust = 0) +
scale_y_continuous(labels = function(x) {parse(text = paste0("10^-",x))}, # set fdr format 10^-X
expand = expansion(mult = c(0, 0.1)))+ #expand to avoid cut off
theme(axis.text.y = element_blank(), axis.ticks.y = element_blank(),axis.text.x = element_text(size = 10)) +
ggtitle(title)
p_list[[name]] = p
}
p = p_list[[1]]+p_list[[2]]
p

pdf(paste0(params$data_out_dir,"ACC_vs_HMSC_GSEA_cc_filtered_barplot.pdf"),width = 10)
p
dev.off()
## png
## 2
volcano_plt = volcano_plot(de_genes = deg,fdr_cutoff = volcano_fdr,fc_cutoff = volcano_fc, ident1 = "HMSC",ident2 = "ACC",top_genes_text = 0)
volcano_plt

pdf(paste0(params$data_out_dir,"/volcano_plot_ACC_VS_HMSC.pdf"))
volcano_plt
dev.off()
## png
## 2
write.csv2(x = deg,file = paste0(params$data_out_dir,"HMSC_ACC_DEG.csv"))
deg$fdr = p.adjust(p = deg$p_val ,method = "fdr")
print("up in ACC:")
## [1] "up in ACC:"
deg %>% filter(avg_log2FC< (-volcano_fc) & fdr < volcano_fdr) %>% nrow()
## [1] 49
print("up in HMSC:")
## [1] "up in HMSC:"
deg %>% filter(avg_log2FC > (volcano_fc) & fdr < volcano_fdr) %>% nrow()
## [1] 94
correlation
acc_pri = FindVariableFeatures(object = acc_pri,nfeatures = 5000,assay = "RNA")
hmsc_cancer_cells = FindVariableFeatures(object = hmsc_cancer_cells,nfeatures = 5000,assay = "RNA")
common_genes = intersect(VariableFeatures(hmsc_cancer_cells,assay = "RNA"),VariableFeatures(acc_pri,assay = "RNA"))
all_cells_matrix = cbind(hmsc_cancer_cells@assays$integrated@data[common_genes,],acc_pri@assays$RNA@data[common_genes,]) %>% as.data.frame()
all_cells_pearson = cor(all_cells_matrix)
annotation = data.frame(dataset = c(rep("HMSC", ncol(hmsc_cancer_cells)), rep("ACC", ncol(acc_pri))),
row.names = c(colnames(hmsc_cancer_cells),colnames(acc_pri)))
row_ha = rowAnnotation(df = annotation, col = list(dataset = c(HMSC = "red",ACC = "green")))
ComplexHeatmap::Heatmap(matrix = all_cells_pearson ,name = "pearson", col = circlize::colorRamp2(seq(-1, 1, length = 3), c("blue", "#EEEEEE", "red")),show_column_names = F,show_row_names = F,right_annotation = row_ha,clustering_distance_columns = "euclidean",clustering_distance_rows = "euclidean",clustering_method_columns = "ward.D2",clustering_method_rows = "ward.D2")

acc_pri = FindVariableFeatures(object = acc_pri,nfeatures = 5000,assay = "RNA")
hmsc_cancer_cells = FindVariableFeatures(object = hmsc_cancer_cells,nfeatures = 5000,assay = "RNA")
common_genes = intersect(VariableFeatures(hmsc_cancer_cells,assay = "RNA"),VariableFeatures(acc_pri,assay = "RNA"))
all_cells_matrix = cbind(hmsc_cancer_cells@assays$integrated@data[common_genes,],acc_pri@assays$RNA@data[common_genes,]) %>% as.data.frame()
all_cells_pearson = cor(all_cells_matrix)
annotation = data.frame(dataset = c(rep("HMSC", ncol(hmsc_cancer_cells)), rep("ACC", ncol(acc_pri))),
row.names = c(colnames(hmsc_cancer_cells),colnames(acc_pri)))
row_ha = rowAnnotation(df = annotation, col = list(dataset = c(HMSC = "red",ACC = "green")))
ComplexHeatmap::Heatmap(matrix = all_cells_pearson ,name = "pearson", col = circlize::colorRamp2(seq(-1, 1, length = 3), c("blue", "#EEEEEE", "red")),show_column_names = F,show_row_names = F,right_annotation = row_ha,clustering_distance_columns = "pearson",clustering_distance_rows = "pearson",clustering_method_columns = "average",clustering_method_rows = "average")
